
# 案例1: 微博爬虫url: https://weibo.com/u/7486643672(不能用selenium拿数据)
# 爬取狂飙所有微博的评论.每篇微博包含字段:评论内容(不包含表情,不包含子评论)
import time

import requests

def kbiao(max_id):
    url = "https://weibo.com/ajax/statuses/buildComments"
    params = {
        "flow": "0",
        "is_reload": "1",
        "id": "4864455396038223",
        "is_show_bulletin": "2",
        "is_mix": "0",
        "max_id": {max_id},
        "count": "20",
        "uid": "7486643672",
        "fetch_level": "0"
    }
    headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.51',
        'referer': 'https://weibo.com/7486643672/Mr2MjpkOH',
        'cookie': 'SINAGLOBAL=2667160769261.232.1679726114985; ULV=1679726115229:1:1:1:2667160769261.232.1679726114985:; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WWAf4q9eGFprHvrAH6Gl0Mp5JpX5KMhUgL.FoMReoncS0zESoq2dJLoIp7LxKML1KBLBKnLxKqL1hnLBoM4S0zcSK5Re0nN; XSRF-TOKEN=BaAVlqLWDZpyNTkhpLjoOQ6s; ALF=1683851943; SSOLoginState=1681259944; SCF=AuyiSRQgWXEdWb6D5AAy_6jsmUq-MqfLws9FqgzI9tDRz8uMvEOrRTPHWfzUyB-UdOECDIucMb91sLjAkfxdNCM.; SUB=_2A25JMY35DeRhGeFG6VoX9yzOzTqIHXVqRvgxrDV8PUNbmtAGLVHjkW9NefXz4ktVtTF1mexOQ3C_hKYMc07m1MsF; WBPSESS=WSuVuvx0FtFbk__G1OA4avBjrKTCJrpVkXCEy5jBW2oYFPrBt5Ur13VH-1Ps5gB9lB1FUOQs5l_sYAg9wjSFe4-4dNh32tmg_RGtg5S6aeTdTgfSTkQaqNH5cqzLABl3Ayp0KlIPDjfFZGwc2yDXxA=='
    }

    r = requests.get(url,params=params,headers=headers).json()
    print(r)
    datas = r['data']
    for data in datas:
        # 评论内容
        comment_content = data['text_raw']
        # print(comment_content)
        f.write(f'{comment_content}\n')
        time.sleep(1)

if __name__ == '__main__':
    with open('狂飙.csv','w',encoding='utf-8-sig') as f:
        f.write('评论内容\n')
        for max_id in range(1, 1249):
            kbiao(max_id)




